package com.o2o.cleaning.month.platform.ebusiness_plat.jumei_2019_7.Jumei_utils

import org.apache.spark.sql.{DataFrame, SparkSession}
import com.o2o.cleaning.month.platform.ebusiness_plat.jumei_2019_7.JuMei
//import platform.jumei.jumei_th1.JuMei

/**
  * @Description TODO 
  * @Author liutaowei
  * @Date 2018/11/22 12:12
  */
object cate_util {

  def jumei_cate_util(data:DataFrame,spark:SparkSession):DataFrame={
//    不需要拆分的分类ID
      val noNeed_more_data = excat_noNeed_more_data(data)
//    需要拆分的分类ID
      val Need_more_data = excat_Need_more_data(data)
//    拆分分类的代码
      val data_001 = xiuzheng_need_data(Need_more_data,spark)
//    需要要拆分的分类直接关联分类表
      val data_002 = relate_cate(noNeed_more_data,spark)
//    将拆分完毕的分类与关联完毕分类表的数据进行合并
    val res = data_001.toJSON.rdd.union(data_002.toJSON.rdd)

//    将需要和不需要拆分的数据合并之后，从10099分类里面或者是从None分类进行补全
    val res_n = spark.read.json(res)
          .selectExpr("*"
//            1009999
            //         ,"length(firstCategoryId) t1","length(secondCategoryId) t2","length(thirdCategoryId) t3","length(fourthCategoryId) t4"
            ,"case when length(secondCategoryId) != 7 then concat(firstCategoryId,'99') else secondCategoryId end second"
            ,"case when length(thirdCategoryId) - length(secondCategoryId) = 0 then concat(firstCategoryId,'9999') " +
              " when  length(thirdCategoryId) - length(secondCategoryId) < 0 then concat(secondCategoryId,'99')" +
              " else thirdCategoryId end third"
            ,"case when length(fourthCategoryId) - length(secondCategoryId) = 0 then concat(firstCategoryId,'999999') " +
              " when  length(fourthCategoryId) - length(thirdCategoryId) = 0 then concat(secondCategoryId,'9999')" +
              " when length(fourthCategoryId) - length(thirdCategoryId) < 0  then concat(thirdCategoryId,'99')" +
              " else fourthCategoryId end four"
          ).drop("secondCategoryId","thirdCategoryId","fourthCategoryId")
          .withColumnRenamed("second","secondCategoryId")
          .withColumnRenamed("third","thirdCategoryId")
          .withColumnRenamed("four","fourthCategoryId")
    res_n
  }

//   不需要需改分类的ID
  def excat_noNeed_more_data(data:DataFrame) = {
    val no_need_data = data.where(
      """
        |subCategoryId !='2532'
        |and  subCategoryId !='2338'
        |and  subCategoryId !='2157'
        |and  subCategoryId !='2075'
        |and  subCategoryId !='1964'
        |and  subCategoryId !='1939'
        |and  subCategoryId !='1935'
        |and  subCategoryId !='1813'
        |and  subCategoryId !='1439'
        |and  subCategoryId !='1436'
        |and  subCategoryId !='1435'
        |and  subCategoryId !='1393'
        |and  subCategoryId !='511'
        |and  subCategoryId !='747'
        |and  subCategoryId !='509'
        |and  subCategoryId !='458'
        |and  subCategoryId !='405'
        |
        |and  subCategoryId !='2195'
        |and  subCategoryId !='2488'
        |and  subCategoryId !='2520'
        |and  subCategoryId !='2598'
        |and  subCategoryId !='2744'
        |and  subCategoryId !='2746'
        |and  subCategoryId !='838'
        |and  subCategoryId !='400'
        |and  subCategoryId !='1299'
        |and  subCategoryId !='1438'
        |and  subCategoryId !='1893'
        |and  subCategoryId !='1895'
        |and  subCategoryId !='1441'
        |and  subCategoryId !='1508'
        |and  subCategoryId !='1675'
        |and  subCategoryId !='1515'
        |and  subCategoryId !='1504'
        |and  subCategoryId !='1665'
        |and  subCategoryId !='2424'
        |and  subCategoryId !='1293'
      """.stripMargin)

    no_need_data
  }

//  需要拆分的分类ID
  def excat_Need_more_data(data:DataFrame) = {
    val need_more_data = data.where(
      """
        |subCategoryId ='2532'
        |or  subCategoryId ='2338'
        |or  subCategoryId ='2157'
        |or  subCategoryId ='2075'
        |or  subCategoryId ='1964'
        |or  subCategoryId ='1939'
        |or  subCategoryId ='1935'
        |or  subCategoryId ='1813'
        |or  subCategoryId ='1439'
        |or  subCategoryId ='1436'
        |or  subCategoryId ='1435'
        |or  subCategoryId ='1393'
        |or  subCategoryId ='511'
        |or  subCategoryId ='747'
        |or  subCategoryId ='509'
        |or  subCategoryId ='458'
        |or  subCategoryId ='405'
        |or  subCategoryId ='2195'
        |or  subCategoryId ='2488'
        |or  subCategoryId ='2520'
        |or  subCategoryId ='2598'
        |or  subCategoryId ='2744'
        |or  subCategoryId ='2746'
        |or  subCategoryId ='838'
        |or  subCategoryId ='400'
        |or  subCategoryId ='1299'
        |or  subCategoryId ='1438'
        |or  subCategoryId ='1893'
        |or  subCategoryId ='1895'
        |or  subCategoryId ='1441'
        |or  subCategoryId ='1675'
        |or  subCategoryId ='1508'
        |or  subCategoryId ='1515'
        |or  subCategoryId ='1504'
        |or  subCategoryId ='1665'
        |or  subCategoryId ='2424'
        |or  subCategoryId ='1293'
      """.stripMargin)

    need_more_data
  }

//  拆分分类的代码
  def xiuzheng_need_data(need_data:DataFrame,spark:SparkSession) = {
//    这个是全量数据
      need_data.registerTempTable("jumei_diff")
      val data_001 = spark.sqlContext.sql(
        """
          |select
          |case
          |when subCategoryId ='2532'   then   (case when title rlike '笔记本电脑' then '10016' else '10016' end)
          |when subCategoryId ='2338'   then   (case when title rlike '牙膏' then '10019' else '10019' end)
          |when subCategoryId ='2157'   then   (case when title rlike '裙子'  then '10011' else '10011' end)
          |when subCategoryId ='2075'   then   (case when title rlike '男装西裤' then '10011' else '10011' end)
          |when subCategoryId ='1964'   then   (case when title rlike '方便面' then '10021'
          |                                          when title rlike '调味品' then '10021'
          |
          |                                          else '10021' end)
          |when subCategoryId ='1939'   then   (case when title rlike '水果'  then '10021' else '10021' end)
          |when subCategoryId ='1935'   then   (case when title rlike '方便面' then '10021'
          |                                          when title rlike '调味品' then '10021'
          |
          |                                          else '10021' end)
          |when subCategoryId ='1813'   then   (case when title rlike '其他女装' then '10011'
          |                                          when title rlike '童装' then '10020'
          |
          |                                          else '10011' end)
          |when subCategoryId ='1439'   then   (case when title rlike '其他个护化妆'  then '10019' else '10020' end)
          |when subCategoryId ='1436'   then   (case when title rlike '沐浴'  then '10019' else '10020' end)
          |when subCategoryId ='1435'   then   (case when title rlike '宝宝洗浴'  then '10020'  else '10019' end)
          |when subCategoryId ='1393'   then   (case when title rlike '面条' then '10021' else '10021' end)
          |when subCategoryId ='747'    then   (case when title rlike '男靴' then '10011'
          |                                          when title rlike '休闲鞋' then '10011'
          |                                          when title rlike '户外鞋' then '10013'
          |                                          else '10013' end)
          |when subCategoryId ='511'    then   (case when title rlike '其他箱包皮具' then '10012' else '10012' end)
          |when subCategoryId ='509'    then   (case when title rlike '其他箱包皮具' then '10012' else '10012' end)
          |when subCategoryId ='458'    then   (case when title rlike '蒸汽' and title rlike '眼罩'  then '10025' else '10019' end)
          |
          |when subCategoryId = '2195' then '10019'
          |when subCategoryId='2488' and title rlike '车载' then '10018'
          |when subCategoryId='2488' then '10015'
          |when subCategoryId='838' then '10011'
          |when subCategoryId='838' then '10011'
          |when subCategoryId='2520' then '10014'
          |when subCategoryId='405' then '10019'
          |when subCategoryId='1438' then '10019'
          |when subCategoryId='1299' then '10019'
          |when subCategoryId='2744' then '10011'
          |when subCategoryId='2746' then '10011'
          |when subCategoryId='2598' then '10018'
          |when subCategoryId='1508' then '10025'
          |when subCategoryId ='1895' then '10019'
          |when subCategoryId ='1893' then '10019'
          |when subCategoryId ='400' then (case when title rlike '电动' or title rlike '震动' or title rlike '声波' or title rlike '振动' then '10014' else '10019' end)
          |when subCategoryId ='1441' or subCategoryId ='1675'
          |     then (case when title rlike '洗衣液' or title rlike '洗衣粉' or title rlike '消毒液' then '10022'
          |                when title rlike '牙膏' then '10014'
          |                else '10022' end)
          |when subCategoryId ='1515'  or subCategoryId ='1504'  or subCategoryId ='1665'
          |     then   (case when title rlike '蒸汽'  and title rlike '眼罩' then '10025'
          |                  else '10022'
          |                  end)
          |when subCategoryId='2424' then (case when title rlike '牙膏' then '10019' else '10014' end )
          |when subCategoryId='1293' then (case when title rlike '牙膏' then '10019' else '10014' end )
          |
          |
          |
          |else '10099'
          |end firstCategoryId,
          |
          |case
          |when subCategoryId ='2532'   then   (case when title rlike '笔记本电脑' then '1001601' else '1001605' end)
          |when subCategoryId ='2338'   then   (case when title rlike '牙膏' then '1001907' else '1001907' end)
          |when subCategoryId ='2157'   then   (case when title rlike '裙子'  then '1001102' else '1001102' end)
          |when subCategoryId ='2075'   then   (case when title rlike '男装西裤' then '1001101' else '1001102' end)
          |when subCategoryId ='1964'   then   (case when title rlike '方便面' then '1002102'
          |                                          when title rlike '调味品' then '1002102'
          |
          |                                          else '1002102' end)
          |when subCategoryId ='1939'   then   (case when title rlike '水果'  then '1002103' else '1002103' end)
          |when subCategoryId ='1935'   then   (case when title rlike '方便面' then '1002102'
          |                                          when title rlike '调味品' then '1002102'
          |
          |                                          else '1002102' end)
          |when subCategoryId ='1813'   then   (case when title rlike '其他女装' then '1001102'
          |                                          when title rlike '童装' then '1002006'
          |
          |                                          else '1001105' end)
          |when subCategoryId ='1439'   then   (case when title rlike '其他个护化妆'  then '1001999' else '1002004' end)
          |when subCategoryId ='1436'   then   (case when title rlike '沐浴'  then '1001906' else '1002004' end)
          |when subCategoryId ='1435'   then   (case when title rlike '宝宝洗浴'  then '1002004'  else '1001904' end)
          |when subCategoryId ='1393'   then   (case when title rlike '面条' then '1002102' else '1002102' end)
          |when subCategoryId ='747'    then   (case when title rlike '男靴' then '1001103'
          |                                          when title rlike '休闲鞋' then '1001103'
          |                                          when title rlike '户外鞋' then '1001303'
          |                                          else '1001301' end)
          |when subCategoryId ='511'    then   (case when title rlike '其他箱包皮具' then '1001299' else '1001299' end)
          |when subCategoryId ='509'    then   (case when title rlike '其他箱包皮具' then '1001299' else '1001299' end)
          |when subCategoryId ='458'    then   (case when title rlike '蒸汽' and title rlike '眼罩'  then '1002510' else '1001902' end)
          |when subCategoryId='2195' then '1001908'
          |when subCategoryId='2488' then (case when  title rlike '车载' then '1001803' else  '1001501' end)
          |when subCategoryId='838' then '1001102'
          |when subCategoryId='2520' then '1001499'
          |when subCategoryId='2744' then '1001102'
          |when subCategoryId='2746' then '1001102'
          |when subCategoryId='2598' then '1001803'
          |when subCategoryId='405' then '1001906'
          |when subCategoryId='1299' then '1001906'
          |when subCategoryId='1438' then '1001906'
          |when subCategoryId ='400'   then   (case when title rlike '电动' or title rlike '震动' or title rlike '声波' or title rlike '振动' then '1001404' else '1001907' end)
          |when subCategoryId ='1895' or subCategoryId ='1893' then
          |                       (case when title rlike '面膜'  then '1001901'
          |                        when title rlike '面霜'  then '1001901'
          |                        when title rlike '卸妆'  then '1001902'
          |                        when title rlike '精萃' or title rlike '精萃' then '1001901'
          |                        when title rlike '唇' or title rlike '口红' then '1001902'
          |                        else '1001999' end
          |                       )
          |when subCategoryId ='1441'  or subCategoryId ='1675'
          |     then (case when title rlike '洗衣液' or title rlike '洗衣粉' or title rlike '消毒液' then '1002202'
          |                when title rlike '牙膏' then '1001404'
          |                else '1002202' end)
          |when subCategoryId ='1515'  or subCategoryId ='1504'  or subCategoryId ='1665'
          |     then   (case when title rlike '蒸汽'  and title rlike '眼罩' then '1002510'
          |                  else '1002204'
          |                  end)
          |when subCategoryId='2424' then (case when title rlike '牙膏' then '1001907' else '1001404' end )
          |when subCategoryId='1293' then (case when title rlike '牙膏' then '1001907' else '1001404' end )
          |when subCategoryId='1508' then "1002503"
          |
          |else 'None'
          |end secondCategoryId,
          |
          |case
          |when subCategoryId ='2532'   then   (case when title rlike '笔记本电脑' then '100160101' else 'None' end)
          |when subCategoryId ='2338'   then   (case when title rlike '牙膏' then '100190701' else 'None' end)
          |when subCategoryId ='2157'   then   (case when title rlike '裙子'  then '100110203' else '100110299' end)
          |when subCategoryId ='2075'   then   (case when title rlike '男装西裤' then '100110103' else '100110299' end)
          |when subCategoryId ='1964'   then   (case when title rlike '方便面' then '100210207'
          |                                          when title rlike '调味品' then '100210202'
          |
          |                                          else '100210299' end)
          |when subCategoryId ='1939'   then   (case when title rlike '水果'  then '100210301' else '100210302' end)
          |when subCategoryId ='1935'   then   (case when title rlike '方便面' then '100210207'
          |                                          when title rlike '调味品' then '100210202'
          |
          |                                          else '100210299' end)
          |when subCategoryId ='1813'   then   (case when title rlike '其他女装' then '100110299'
          |                                          when title rlike '童装' then '100200601'
          |
          |                                          else '100110599' end)
          |when subCategoryId ='1439'   then   (case when title rlike '其他个护化妆'  then 'None' else 'None' end)
          |when subCategoryId ='1436'   then   (case when title rlike '沐浴'  then '100190601' else '100200401' end)
          |when subCategoryId ='1435'   then   (case when title rlike '宝宝洗浴'  then '100200401'  else 'None' end)
          |when subCategoryId ='1393'   then   (case when title rlike '面条' then '100210206' else '100210299' end)
          |when subCategoryId ='747'    then   (case when title rlike '男靴' then '100110302'
          |                                          when title rlike '休闲鞋' then '100110304'
          |                                          when title rlike '户外鞋' then '100110399'
          |                                          else '100130199' end)
          |when subCategoryId ='511'    then   (case when title rlike '其他箱包皮具' then 'None' else 'None' end)
          |when subCategoryId ='509'    then   (case when title rlike '其他箱包皮具' then 'None' else 'None' end)
          |when subCategoryId ='458'    then   (case when title rlike '蒸汽' and title rlike '眼罩'  then '100251002' else '100190299' end)
          |when subCategoryId ='405' and title rlike '洗手液'    then  '100190604'
          |when subCategoryId ='1438'  then  '100190604'
          |when subCategoryId ='1299'  then  '100190604'
          |
          |when subCategoryId='2195'  then '100190801'
          |when subCategoryId='2488'  then (case when  title rlike '车载' then '100180399' else  '100150199' end )
          |when subCategoryId='838'   then '100110201'
          |when subCategoryId='2520' then 'None'
          |when subCategoryId='2744' then '100110299'
          |when subCategoryId='2746' then '100110299'
          |when subCategoryId='2598' then '100180399'
          |when subCategoryId='2424' then (case when title rlike '牙膏' then '100190701' else '100140408' end )
          |when subCategoryId='1293' then (case when title rlike '牙膏' then '100190701' else '100140408' end )
          |when subCategoryId ='400'   then   (case when title rlike '电动' or title rlike '震动' or title rlike '声波' or title rlike '振动' then '100140408' else '100190703' end)
          |when subCategoryId ='1515'  or subCategoryId ='1504'  or subCategoryId ='1665'
          |     then   (case when title rlike '蒸汽'  and title rlike '眼罩' then '100251002'
          |                  else '100220499'
          |                  end)
          |when subCategoryId ='1895' or subCategoryId ='1893' then
          |                       (case when title rlike '面膜'  then '100190104'
          |                        when title rlike '面霜'  then '100190103'
          |                        when title rlike '卸妆'  then '100190201'
          |                        when title rlike '精萃' or title rlike '精萃' then '100190107'
          |                        when title rlike '唇' or title rlike '口红' then '100190204'
          |                        else 'None' end
          |                       )
          |when subCategoryId ='1441' or subCategoryId ='1675'
          |     then (case when title rlike '洗衣液' then '100220201'
          |               when title rlike '洗衣粉' then '100220202'
          |                when title rlike '消毒液' then '100220203'
          |                when title rlike '牙膏' then '100140408'
          |                else 'None' end)
          |when subCategoryId ='1508' then "100250399"
          |
          |else 'None'
          |end thirdCategoryId,
          |'None' fourthCategoryId,
          |case
          |when subCategoryId ='2532'   then   (case when title rlike '笔记本电脑' then 'BBBDN_2532' else '2532' end)
          |when subCategoryId ='2338'   then   (case when title rlike '牙膏' then 'YG_2338' else '2338' end)
          |when subCategoryId ='2157'   then   (case when title rlike '裙子'  then 'QZ_2157' else '2157' end)
          |when subCategoryId ='2075'   then   (case when title rlike '男装西裤' then 'XK_2075' else '2075' end)
          |when subCategoryId ='1964'   then   (case when title rlike '方便面' then 'FBM_1964'
          |                                          when title rlike '调味品' then 'TWP_1964'
          |                                          else '1964' end)
          |when subCategoryId ='1939'   then   (case when title rlike '水果'  then 'SG_1939' else 'SC_1939' end)
          |when subCategoryId ='1935'   then   (case when title rlike '方便面' then 'FBM_1935'
          |                                          when title rlike '调味品' then 'TWP_1935'
          |                                          else '1935' end)
          |when subCategoryId ='1813'   then   (case when title rlike '其他女装' then 'QTNZ_1913'
          |                                          when title rlike '童装' then 'TZ_1913'
          |                                          else '1813' end)
          |when subCategoryId ='1436'   then   (case when title rlike '沐浴'  then 'MY_1436' else 'BBMY_1436' end)
          |when subCategoryId ='1435'   then   (case when title rlike '宝宝洗浴'  then 'BBMY_1435'  else '1435' end)
          |when subCategoryId ='1393'   then   (case when title rlike '面条' then 'MT/GM_1393' else '1393' end)
          |when subCategoryId ='747'    then   (case when title rlike '男靴' then 'NX_747'
          |                                          when title rlike '休闲鞋' then 'XXX_747'
          |                                          when title rlike '户外鞋' then '747'
          |                                          else '747' end)
          |when subCategoryId ='458'    then   (case when title rlike '蒸汽' and title rlike '眼罩'  then 'ZQYZ_458' else '458' end)
          |when subCategoryId='838' then 'LF_838'
          |when subCategoryId ='400'   then   (case when title rlike '电动' or title rlike '震动' or title rlike '声波' or title rlike '振动' then '400' else 'YS_400' end)
          |
          |else subCategoryId
          |end customCategoryId,
          |
          |
          |*
          |from
          |jumei_diff
        """.stripMargin)
    data_001
  }

  def relate_cate(data:DataFrame,spark:SparkSession) = {
//    读取分类表
    val cate = spark.read.json(JuMei.jumeiCate_Url)
      .dropDuplicates("subCategoryId")
      .select("subCategoryId","firstCategoryId","secondCategoryId","thirdCategoryId","fourthCategoryId")
//    关联不上的分类
    val cate_fill = Map("firstCategoryId"->"10099","secondCategoryId"->"None","thirdCategoryId"->"None","fourthCategoryId"->"None")
//原始数据与分类表进行关联
    val data_002 = data.join(cate,Seq("subCategoryId"),"left").na.fill(cate_fill)
    data_002
  }


    def month_12_cate(data:DataFrame,spark:SparkSession) = {

    val ca = spark.read.json("C:\\Users\\o2o-rd-0007\\Desktop\\pe\\jumei")
    data.registerTempTable("da_2")
    ca.registerTempTable("ca_2")

    val res = spark.sqlContext.sql(
      """
        |select t1.*,
        |t2.rootCategoryId,
        |t2.rootCategoryName,
        |t2.categoryId,
        |t2.categoryName,
        |t2.subCategoryName,
        |t2.subCategoryId,
        |t2.firstCategoryId,
        |t2.secondCategoryId,
        |t2.thirdCategoryId,
        |t2.fourthCategoryId
        |
        |from da_2 t1
        |left join
        |ca_2 t2
        |on t1.good_id = t2.good_id
        |where t2.good_id is null
      """.stripMargin)

    println(res.count())
    res
  }
}
